Load required libraries

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/petermacpherson/Documents/Documents - Peter’s Mac mini/Projects/AQ-MTB_2025-05-14/Work/Data/aq_mtb
library(brms)
## Warning: package 'brms' was built under R version 4.3.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.3
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## 
## The following object is masked from 'package:stats':
## 
##     ar
library(tidybayes)
## Warning: package 'tidybayes' was built under R version 4.3.3
## 
## Attaching package: 'tidybayes'
## 
## The following objects are masked from 'package:brms':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(mgcv)
## Warning: package 'mgcv' was built under R version 4.3.3
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 4.3.3
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
## 
## Attaching package: 'mgcv'
## 
## The following objects are masked from 'package:brms':
## 
##     s, t2
library(priorsense)
## Warning: package 'priorsense' was built under R version 4.3.3
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(nngeo) 

Air quality data

By summary, a random household survey was conducted in Blantyre Malawi during 2019-2020 (pre-COVID in Malawi) as part of a TB cluster randomised trial pre-intervention prevalence survey (sumamrised here: https://europepmc.org/article/MED/37862284). Fieldworkers carried purple air monitors attached to their backpacks, with measurements set to read every 90 seconds. Not all fieldworkers had a monitor. Previously, Helen Savage cleaned these monitor data, and merged to household questionnaire data, filtering measurements by interview times. Therefore, measurements are indoor household measurements.

Cluster boundaries (72 in total) are based on Ministry of Health Community Health Worker catchment areas, refined in collaboration with researchers. These were the unit of randomisation for the SCALE cluster randomised trial.

All air quality measurements were then taken from households within the 72 SCALE cluster.

load("input_data/aq_in.RData") #cleaned air-quality dataset
load("input_data/scale_72_clusters.rda") #SCALE clusters

Get data into correct shape and aggregate measurements per household. Note as purple air devices took measurements every 90 seconds, mostly housholds surveyed have multiple measurements. Here, for convenience, we take the mean of these measurement per household.

aq_dat <- aq_in %>%
  select(datetime, h02cl_id, hh_id, hh_lat, hh_lon, temp_c, current_humidity, mean_pm_2_5, distance_km) %>%
 mutate(
   datetime = ymd_hms(datetime),
   hour = hour(datetime) + minute(datetime)/60,
   doy = yday(datetime),
   x = hh_lon,
   y = hh_lat) %>%
  group_by(hh_id) %>%
  reframe(h02cl_id = h02cl_id,
          x = x,
          y = y,
          mean_doy = mean(doy, na.rm=TRUE),
          mean_pm2_5 = mean(mean_pm_2_5), 
          mean_temp_c = mean(temp_c, na.rm=TRUE),
          mean_current_humidity = mean(current_humidity, na.rm=TRUE)) %>%
  distinct() %>%
  mutate(log_pm2_5 = log(mean_pm2_5))


#we will use Fourier series components with multiple harmonics to model day-of-the year effects
aq_dat <- aq_dat %>%
  mutate(
    sin_doy1 = sin(2 * pi * mean_doy / 365),
    cos_doy1 = cos(2 * pi * mean_doy / 365),
    sin_doy2 = sin(4 * pi * mean_doy / 365),
    cos_doy2 = cos(4 * pi * mean_doy / 365),
    sin_doy3 = sin(6 * pi * mean_doy / 365),
    cos_doy3 = cos(6 * pi * mean_doy / 365)
  )

#check some variables
aq_dat %>%
  select(hh_id, log_pm2_5, mean_temp_c, mean_current_humidity, mean_doy) %>%
  pivot_longer(cols = c(log_pm2_5, mean_temp_c, mean_current_humidity, mean_doy)) %>%
  ggplot() +
  geom_histogram(aes(x=value, fill=name)) +
  facet_wrap(name~., scales = "free_x")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#PM2.5 distribution by day of the year
aq_dat %>%
  ggplot() +
  geom_jitter(aes(x = mean_doy, y=log_pm2_5), colour="darkred", alpha=0.5) +
  theme_ggdist() +
  theme(panel.background = element_rect(colour="grey78"))

#Note that there was no data collection in April of either year


#plot the measurements on a map
aq_dat %>%
  ggplot() +
  geom_sf(data=scale_72_clusters, fill=NA) +
  geom_point(aes(x=x, y=y, colour=log_pm2_5), size=0.1)+
  scale_color_viridis_c() +
  theme_ggdist() +
  theme(panel.background = element_rect(colour="grey78")) 
## old-style crs object detected; please recreate object with a recent sf::st_crs()
## old-style crs object detected; please recreate object with a recent sf::st_crs()

#Note the warning about old coordinate reference system - we will fix this below.

Covariates

Population counts and density

#read in the .tiff image of population count from Worldpop
#https://hub.worldpop.org/geodata/summary?id=1560
mw_100m <- stars::read_stars("input_data/mwi_ppp_2020.tif")


#define buffer around
buffer_m <- 500

#approximate conversion factor (degrees per meter)
#adjust for latitude for longitude direction
lat_mean <- mean(aq_dat$y)
deg_per_m_lat <- 1 / 111320  # Rough conversion for latitude
deg_per_m_lon <- 1 / (111320 * cospi(lat_mean / 180))  # Adjust for latitude

#expandbounding box with buffer
bbox_buffered <- st_bbox(c(
  xmin = min(aq_dat$x) - buffer_m * deg_per_m_lon,
  xmax = max(aq_dat$x) + buffer_m * deg_per_m_lon,
  ymin = min(aq_dat$y) - buffer_m * deg_per_m_lat,
  ymax = max(aq_dat$y) + buffer_m * deg_per_m_lat
), crs = st_crs(mw_100m))

#crop worldpop raster to buffered bbox
mw_100m_cropped <- st_crop(mw_100m, bbox_buffered)

#plot
plot(mw_100m_cropped)

#there are a small number of cells in the centre that are NA
#I think these are the military radar on Mount Soche,
#so no expected population
#replace with 0
mw_100m_cropped[[1]][is.na(mw_100m_cropped[[1]])] <- 0

#did it work?
plot(mw_100m_cropped)

#xonvert to sf object
aq_sf <- st_as_sf(aq_dat, coords = c("x", "y"), crs = st_crs(mw_100m_cropped))

#merge with the population count data
aq_sf$pop_density <- stars::st_extract(mw_100m_cropped, aq_sf)[[1]]

#get population density
aq_model_data <- aq_sf %>%
  st_drop_geometry() %>%
  as_tibble() %>%
  mutate(pop_density_km2 = pop_density / 0.01) %>%
  left_join(aq_dat %>% select(hh_id, x, y))
## Joining with `by = join_by(hh_id)`
#fix the coordinate crs of the scale clusters
scale_72_clusters <-  st_transform(scale_72_clusters, st_crs(mw_100m))
## old-style crs object detected; please recreate object with a recent sf::st_crs()

Building density (i.e. we get the building footprints, and calculate their combined footprint area per grid cell, then calculate what percentage of the grid cell is “building footprint”) https://sites.research.google/gr/open-buildings/#open-buildings-download

Using Version 2 here - can update later to V3, with better precision, and year-specific data. (I think this is for 2020 currently)

#read in the data set
#is a stars dataset
buildings <- read_rds("input_data/blantyre_buff_buildings.rds")

#match the crs to the population data
buildings <-  st_transform(buildings, st_crs(mw_100m))

#convert stars object to polygons
mw_100m_grid_sf <- st_as_sf(mw_100m_cropped, as_points = FALSE, merge = FALSE)

#add grid cell ID for grouping
mw_100m_grid_sf <- mw_100m_grid_sf %>%
  mutate(grid_id = row_number())

#join building footprint data to the population data
buildings_with_grid <- st_join(buildings, mw_100m_grid_sf, left = FALSE)

#filter by confidence of building footprint, and by footprint size
#in Hannah R's previous experiements, this was a good compromise
#and matches well with home visits on the ground.
buildings_with_grid <- buildings_with_grid %>%
  filter(area_in_meters>20) %>%
  filter(area_in_meters<300) %>%
  filter(confidence>0.69)

#sum building footprint per grid cell
building_area_per_cell <- buildings_with_grid %>%
  group_by(grid_id) %>%
  summarise(total_building_area_m2 = sum(area_in_meters, na.rm = TRUE))

#cell area in m squared
cell_area_m2 <- 100 * 100

#compute percentage coverage
building_area_per_cell <- building_area_per_cell %>%
  mutate(building_coverage_pct = (total_building_area_m2 / cell_area_m2) * 100)

#join
mw_100m_grid_sf <- mw_100m_grid_sf %>%
  left_join(building_area_per_cell %>% st_drop_geometry(), by = "grid_id") %>%
  mutate(building_coverage_pct = replace_na(building_coverage_pct, 0))

#check by plotting
mw_100m_grid_sf %>%
  mutate(x = st_coordinates(st_centroid(.))[, 1],
         y = st_coordinates(st_centroid(.))[, 2]) %>%
  ggplot() +
  geom_tile(aes(x=x, y=y, fill=building_coverage_pct)) +
  scale_fill_viridis_c(option = "cividis") +
  theme_ggdist() +
  theme(panel.background = element_rect(colour = "grey78"))
## Warning: There were 2 warnings in `stopifnot()`.
## The first warning was:
## ℹ In argument: `x = st_coordinates(st_centroid(.))[, 1]`.
## Caused by warning:
## ! st_centroid assumes attributes are constant over geometries
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

#Look at building footprints in one cluster

cluster_buildings <- st_join(scale_72_clusters, buildings)

cluster_buildings$footprints_geom <- st_as_sfc(cluster_buildings$footprints, crs = st_crs(cluster_buildings))
st_geometry(cluster_buildings) <- "footprints_geom"

cluster_buildings %>%
  filter(cluster =="c1") %>%
  ggplot() +
  geom_sf(fill = "mediumseagreen") +
  geom_sf(data = scale_72_clusters %>% filter(cluster == "c1"), fill=NA)

Distance to the nearest main road. I.e. because we think that PM2.5 exposure can come from road vehicles.

We use Open Street Map (OSM) data to get the main roads (there are few of these in Blantyre… but see the most traffic). Later could consider including some smaller road…

#get the OSM features.
q <- opq(bbox = bbox_buffered) %>%
  add_osm_feature(key = 'highway', 
                  value = c('primary', 'secondary', 'tertiary', 
                            'trunk', 'motorway'))

#import
osm_roads <- osmdata_sf(q)

##get ready.
main_roads <- osm_roads$osm_lines
main_roads <- st_transform(main_roads, st_crs(mw_100m))

#plot roads
main_roads %>%
  ggplot() +
  geom_sf(data = scale_72_clusters, colour="darkred") +
  geom_sf() 

#crop to grid
main_roads_cropped <- st_crop(main_roads, bbox_buffered)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
#plot again.
main_roads_cropped %>%
  ggplot() +
  geom_sf(data = scale_72_clusters, colour="darkred") +
  geom_sf() 

#get centroids of grid cells
grid_centroids <- st_centroid(mw_100m_grid_sf)
## Warning: st_centroid assumes attributes are constant over geometries
#use nearest neighbour search to get the distance from the grid cell centroid to the nearest main road.
nearest_roads <- st_nn(grid_centroids, main_roads_cropped, k = 1, returnDist = TRUE)
## lines or polygons
##   |                                                                              |                                                                      |   0%  |                                                                              |                                                                      |   1%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |==                                                                    |   4%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |=======                                                               |  11%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |=========                                                             |  14%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |================                                                      |  24%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |=======================                                               |  34%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |==============================                                        |  44%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |============================================                          |  64%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |===================================================                   |  74%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |========================================================              |  81%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |==========================================================            |  84%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |=================================================================     |  94%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
dist_to_road_m <- sapply(nearest_roads$dist, `[`, 1)

#same as before: add to grid
mw_100m_grid_sf$dist_to_road_m <- dist_to_road_m

#check plot
mw_100m_grid_sf %>%
  mutate(x = st_coordinates(st_centroid(.))[, 1],
         y = st_coordinates(st_centroid(.))[, 2]) %>%
  ggplot() +
  geom_tile(aes(x=x, y=y, fill=dist_to_road_m)) +
  scale_fill_viridis_c(option = "G", direction = -1) +
  theme(panel.background = element_rect(colour = "grey78")) +
  theme_ggdist() +
  theme(panel.background = element_rect(colour = "grey78"))
## Warning: There were 2 warnings in `stopifnot()`.
## The first warning was:
## ℹ In argument: `x = st_coordinates(st_centroid(.))[, 1]`.
## Caused by warning:
## ! st_centroid assumes attributes are constant over geometries
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

Join all together into the modelling dataset, and just last tidy-up.

aq_model_data <- st_as_sf(aq_model_data, coords = c("x", "y"), crs = st_crs(mw_100m))

aq_model_data <- st_join(aq_model_data, mw_100m_grid_sf)

aq_model_data <- aq_model_data %>%
  mutate(x = st_coordinates(st_centroid(.))[, 1],
         y = st_coordinates(st_centroid(.))[, 2]) %>%
  st_drop_geometry() %>%
  mutate(building_coverage_pct = building_coverage_pct/100)

Model 1:

“We fitted a spatially smooth regression model for log-transformed PM2.5 concentrations using a Gaussian process (GP) smooth over spatial coordinates (x, y) to capture flexible spatial trends. Seasonal variation was modelled using a Fourier series expansion of day-of-year up to the 3rd harmonic (i.e., sine and cosine terms for annual, semi-annual, and tri-annual cycles) to account for complex seasonal patterns. The model was fitted in a Bayesian framework using brms with a Gaussian likelihood and the cmdstanr backend.”

Note, priors are still causing issues for me, so we will comment out now, and use the defauly stan weakly informative priors

Priors

# 
# priors <- c(
#   prior(normal(0, 1), class = "Intercept"),
#   prior(normal(0,1), class = "b"),
#   prior(student_t(3, 0, 2.5), class = "sigma"),
#   prior(exponential(0.5), class = "sdgp")
# )

Fit prior only model

# 
# m1_prior <- brm(
#     formula = log_pm2_5 ~ gp(x, y, k=15) +
#       sin_doy1 + cos_doy1 +
#       sin_doy2 + cos_doy2 +
#       sin_doy3 + cos_doy3,
#     data = aq_model_data,
#     family = gaussian(),
#     prior = priors,
#     sample_prior = "only",
#     chains = 4, cores = 4,
#     backend = "cmdstanr"
#   )
# 
# 
# summary(m1_prior)
# plot(m1_prior)

Now model with data.

m1 <- brm(
    formula = log_pm2_5 ~ gp(x, y, k=15)  +
    sin_doy1 + cos_doy1 +
    sin_doy2 + cos_doy2 +
    sin_doy3 + cos_doy3,
    data = aq_model_data,
    family = gaussian(),
    #prior = priors,
    chains = 4, cores = 4,
    backend = "cmdstanr"
  )
## Start sampling
## Running MCMC with 4 parallel chains...
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2 Rejecting initial value:
## Chain 2   Gradient evaluated at the initial value is not finite.
## Chain 2   Stan can't start sampling from this initial value.
## Chain 2 Rejecting initial value:
## Chain 2   Gradient evaluated at the initial value is not finite.
## Chain 2   Stan can't start sampling from this initial value.
## Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3 Rejecting initial value:
## Chain 3   Gradient evaluated at the initial value is not finite.
## Chain 3   Stan can't start sampling from this initial value.
## Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4 Rejecting initial value:
## Chain 4   Gradient evaluated at the initial value is not finite.
## Chain 4   Stan can't start sampling from this initial value.
## Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 2 finished in 95.2 seconds.
## Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 4 finished in 128.5 seconds.
## Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 3 finished in 129.2 seconds.
## Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 1 finished in 135.2 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 122.0 seconds.
## Total execution time: 135.4 seconds.
## Loading required package: rstan
## Warning: package 'rstan' was built under R version 4.3.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.3.3
## 
## rstan version 2.36.0.9000 (Stan version 2.35.0)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
summary(m1)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: log_pm2_5 ~ gp(x, y, k = 15) + sin_doy1 + cos_doy1 + sin_doy2 + cos_doy2 + sin_doy3 + cos_doy3 
##    Data: aq_model_data (Number of observations: 3363) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Gaussian Process Hyperparameters:
##              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sdgp(gpxy)       2.79      1.13     1.32     5.63 1.00     3347     2879
## lscale(gpxy)     0.03      0.01     0.01     0.05 1.00     4796     2870
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     2.88      0.18     2.50     3.23 1.00     3462     2039
## sin_doy1     -0.70      0.07    -0.84    -0.57 1.00     3229     2666
## cos_doy1     -0.20      0.06    -0.33    -0.08 1.00     6817     3082
## sin_doy2      0.10      0.05    -0.00     0.20 1.00     3495     2873
## cos_doy2     -0.06      0.06    -0.18     0.06 1.00     3646     2727
## sin_doy3     -0.19      0.04    -0.27    -0.12 1.00     5360     3018
## cos_doy3      0.18      0.04     0.10     0.26 1.00     5226     3360
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.56      0.01     0.55     0.58 1.00     6724     2855
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
conditional_effects(m1)

pp_check(m1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

plot(m1)

Compare prior-only model to model with data using priorsense package:

# m1_draws <- as_draws_df(m1)
# 
# # Extract prior and posterior draws
# powerscale_sensitivity(m1, variable = c("b_Intercept", "b_sin_doy", "b_cos_doy", 
#                                         "sdgp_gpxy", "sigma", "Intercept"))
# 
# powerscale_plot_dens(m1, variable = c("b_Intercept", "b_sin_doy", "b_cos_doy", 
#                                         "sdgp_gpxy", "sigma", "Intercept"))

Predictions by space and time. Here, although we only have data within clusters, we will predict for cells outside of clusters based on covariates. We will also predict for the month for whcih we do not have data.

#set_up prediction date frame
nd_m1 <- st_as_sf(mw_100m_cropped)  %>%
  mutate(centroid = st_centroid(geometry)) %>%
  mutate(x = st_coordinates(centroid)[, 1],
         y = st_coordinates(centroid)[, 2]) %>%
  select(-centroid)  %>%
  st_drop_geometry() %>%
  rename(pop_density = 1) %>%
  mutate(pop_density_km2 = pop_density / 0.01)

#get the first day of each month for prediction
first_days <- ymd(paste0("2020-", sprintf("%02d", 1:12), "-01"))
first_day_doy <- yday(first_days)

#calculate Fourier terms
first_day_seasonality <- tibble(
  mean_doy = first_day_doy,
  sin_doy1 = sin(2 * pi * mean_doy / 365.25),
  cos_doy1 = cos(2 * pi * mean_doy / 365.25),
  sin_doy2 = sin(4 * pi * mean_doy / 365.25),
  cos_doy2 = cos(4 * pi * mean_doy / 365.25),
  sin_doy3 = sin(6 * pi * mean_doy / 365.25),
  cos_doy3 = cos(6 * pi * mean_doy / 365.25),
  date = first_days
)

#expand grid for prediction
nd_m1 <- nd_m1 %>%
  crossing(first_day_seasonality)

#take posterior draws - note storing as an rvars object for efficiency
#note can only manage this without crashing computer!
#to address later, and improve efficieny for more draws
m1_draws <- add_epred_rvars(object = m1,
                            newdata = nd_m1,
                            ndraws = 200) 

#now extract the summary mean and sd for each month-grid cell combination
m1_epred_array <- posterior::draws_of(m1_draws$.epred)

#compute posterior means per location
m1_draws$.epred_mean <- colMeans(m1_epred_array)

#compute posterior sds per location
m1_draws$.epred_sd <- apply(m1_epred_array, 2, sd)

m1_sum <- m1_draws %>% 
  select(x, y, date, .epred_mean, .epred_sd)


#plot predictions
m1_sum %>%  
  mutate(date = month(date, label=TRUE)) %>%
  ggplot() +
  geom_tile(aes(x = x, y=y, fill=.epred_mean)) +
  geom_sf(data = scale_72_clusters, colour="grey78", fill=NA) +
  scale_fill_viridis_c() +
  facet_wrap(date~.) +
  labs(title = "Log(PM2.5) exposure",
       x="",
       y="") +
  theme_ggdist() +
  theme(panel.background = element_rect(fill=NA, colour="grey78"),
        axis.text.x = element_text(angle = 45, hjust=1),
        strip.background = element_rect(colour = "grey78"))

Now just draw 50 random coordinates, and predict by time to see whether we captured the trend.

#sample coordinate points from the household dataset
#we will fix the small number sampled later!
set.seed(123) 
sampled_points_m1 <- aq_model_data %>%
  sample_n(50) %>%
  select(x, y)

#generate DOY 0–365 and calculate Fourier terms
doy_grid <- tibble(doy = 0:365) %>%
  mutate(
    sin_doy1 = sin(2 * pi * doy / 365),
    cos_doy1 = cos(2 * pi * doy / 365),
    sin_doy2 = sin(4 * pi * doy / 365),
    cos_doy2 = cos(4 * pi * doy / 365),
    sin_doy3 = sin(6 * pi * doy / 365),
    cos_doy3 = cos(6 * pi * doy / 365)
  )

#expand grid across sampled locations
prediction_df_m1 <- sampled_points_m1 %>%
  crossing(doy_grid)

#add predictions
preds_m1 <- add_epred_draws(object = m1, newdata = prediction_df_m1)

#summarise
preds_m1_sum <- preds_m1 %>%
  ungroup() %>%
  group_by(doy) %>%
  summarise(.epred_mean = mean(.epred),
            .lower = quantile(.epred, probs=0.025),
            .upper = quantile(.epred, probs = 0.975))

#GEt the observed data
observed_data <- aq_model_data %>%
  select(mean_doy, log_pm2_5) 

#plot
preds_m1_sum %>%
  ggplot() +
  geom_ribbon(aes(x=doy, ymin=.lower, ymax = .upper), fill="steelblue", alpha=0.3) +
  geom_line(aes(x=doy, y=.epred_mean)) +
  geom_jitter(data = observed_data, aes(x = mean_doy, y = log_pm2_5),
              color = "darkred", alpha = 0.6, width = 0.5, height = 0.0, size = 1.2) +
  labs(title = "Model-estimated log(PM2.5) with empirical measurements",
       subtitle = "Model predictions with 95% CrI and observed data points",
       x = "Day of year",
       y = "log(PM2.5)",
       caption = "Modelled estimates restricted to within clusters") +
  theme_ggdist() +
  theme(panel.background = element_rect(colour = "grey78"),
        legend.position = "none")

Model 2: With covariates

Now we include grid level covariates of distance to the road, population density, and building footprint percent, along witht the mean temp and huidity on the day of measurement (from the purple air monitors)

Again, priors to be fixed later

# priors <- c(
#   prior(normal(0, 1), class = "Intercept"),
#   prior(normal(0,1), class = "b"),
#   prior(student_t(3, 0, 2.5), class = "sigma"),
#   prior(exponential(0.5), class = "sdgp"),
# )


m2 <- brm(
    formula = log_pm2_5 ~ gp(x, y, k=15)  +
      s(mean_temp_c, k=5) + 
      s(mean_current_humidity, k=5) +
      s(pop_density_km2, k=5) +
      s(building_coverage_pct, k=5) +
      s(dist_to_road_m, k=5) +
      sin_doy1 + cos_doy1 +
      sin_doy2 + cos_doy2 +
      sin_doy3 + cos_doy3,
    data = aq_model_data,
    family = gaussian(),
    #prior = priors,
    chains = 4, cores = 4,
    backend = "cmdstanr"
  )
## Start sampling
## Running MCMC with 4 parallel chains...
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Rejecting initial value:
## Chain 1   Gradient evaluated at the initial value is not finite.
## Chain 1   Stan can't start sampling from this initial value.
## Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
## Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
## Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
## Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
## Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
## Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
## Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
## Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
## Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
## Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
## Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
## Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
## Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
## Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
## Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 1 finished in 222.7 seconds.
## Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
## Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 2 finished in 227.2 seconds.
## Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
## Chain 4 finished in 235.2 seconds.
## Chain 3 finished in 235.3 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 230.1 seconds.
## Total execution time: 235.7 seconds.
## Warning: 2 of 4000 (0.0%) transitions ended with a divergence.
## See https://mc-stan.org/misc/warnings for details.
summary(m2)
## Warning: There were 2 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: log_pm2_5 ~ gp(x, y, k = 15) + s(mean_temp_c, k = 5) + s(mean_current_humidity, k = 5) + s(pop_density_km2, k = 5) + s(building_coverage_pct, k = 5) + s(dist_to_road_m, k = 5) + sin_doy1 + cos_doy1 + sin_doy2 + cos_doy2 + sin_doy3 + cos_doy3 
##    Data: aq_model_data (Number of observations: 3363) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Smoothing Spline Hyperparameters:
##                               Estimate Est.Error l-95% CI u-95% CI Rhat
## sds(smean_temp_c_1)               1.76      0.95     0.71     4.29 1.00
## sds(smean_current_humidity_1)     1.33      0.85     0.28     3.52 1.00
## sds(spop_density_km2_1)           0.83      0.74     0.04     2.80 1.00
## sds(sbuilding_coverage_pct_1)     0.63      0.60     0.02     2.28 1.00
## sds(sdist_to_road_m_1)            0.35      0.41     0.01     1.46 1.00
##                               Bulk_ESS Tail_ESS
## sds(smean_temp_c_1)               2925     2810
## sds(smean_current_humidity_1)     2165     1658
## sds(spop_density_km2_1)           1539     2334
## sds(sbuilding_coverage_pct_1)     1008     1605
## sds(sdist_to_road_m_1)            2455     3196
## 
## Gaussian Process Hyperparameters:
##              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sdgp(gpxy)       2.87      1.15     1.32     5.68 1.00     3203     2939
## lscale(gpxy)     0.02      0.01     0.01     0.04 1.00     6285     3235
## 
## Regression Coefficients:
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                    2.86      0.18     2.50     3.20 1.00     5001
## sin_doy1                    -0.77      0.07    -0.90    -0.64 1.00     4401
## cos_doy1                    -0.50      0.07    -0.64    -0.36 1.00     8350
## sin_doy2                     0.09      0.05    -0.01     0.19 1.00     5458
## cos_doy2                    -0.04      0.06    -0.16     0.08 1.00     4261
## sin_doy3                    -0.17      0.04    -0.24    -0.10 1.00     6149
## cos_doy3                     0.11      0.04     0.03     0.19 1.00     5043
## smean_temp_c_1               3.16      0.58     2.03     4.32 1.00     8155
## smean_current_humidity_1     1.51      0.42     0.69     2.33 1.00     6041
## spop_density_km2_1           0.43      0.71    -0.74     1.98 1.00     3379
## sbuilding_coverage_pct_1     0.46      0.36    -0.13     1.21 1.00     2491
## sdist_to_road_m_1           -0.24      0.37    -0.92     0.61 1.00     4546
##                          Tail_ESS
## Intercept                    2367
## sin_doy1                     3029
## cos_doy1                     2928
## sin_doy2                     3364
## cos_doy2                     3476
## sin_doy3                     3219
## cos_doy3                     3192
## smean_temp_c_1               2771
## smean_current_humidity_1     3460
## spop_density_km2_1           2859
## sbuilding_coverage_pct_1     3193
## sdist_to_road_m_1            3239
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.54      0.01     0.53     0.56 1.00     5985     2792
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
conditional_effects(m2)

pp_check(m2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

plot(m2)

Predictions by space and time

#first we need a prediction dataframe with all of the covariate data

#extract month from date variable
aq_model_data <- aq_model_data  %>%
  mutate(month = lubridate::month(as.Date(mean_doy, origin = "2019-12-31")))

#calculate monthly means
monthly_means <- aq_model_data %>%
  group_by(month) %>%
  summarise(
    mean_temp_c = mean(mean_temp_c, na.rm = TRUE),
    mean_current_humidity = mean(mean_current_humidity, na.rm = TRUE)
  )

#No data collection in April - here we will just interpolate
#can do better later...
march_temp <- monthly_means %>% filter(month == 3) %>% pull(mean_temp_c)
may_temp <- monthly_means %>% filter(month == 5) %>% pull(mean_temp_c)
april_temp <- (march_temp + may_temp) / 2

march_humidity <- monthly_means %>% filter(month == 3) %>% pull(mean_current_humidity)
may_humidity <- monthly_means %>% filter(month == 5) %>% pull(mean_current_humidity)
april_humidity <- (march_humidity + may_humidity) / 2

#add these into the prediction dataframe
monthly_means <- monthly_means %>%
  add_row(month=4, 
          mean_temp_c = april_temp,
          mean_current_humidity = april_humidity) %>%
  arrange(month)

#set_up prediction date frame
nd_m2 <- mw_100m_grid_sf %>%
  mutate(x = st_coordinates(st_centroid(.))[, 1],
         y = st_coordinates(st_centroid(.))[, 2]) %>%
  st_drop_geometry() %>%
  rename(pop_density = 1) %>%
  mutate(pop_density_km2 = pop_density / 0.01)
## Warning: There were 2 warnings in `stopifnot()`.
## The first warning was:
## ℹ In argument: `x = st_coordinates(st_centroid(.))[, 1]`.
## Caused by warning:
## ! st_centroid assumes attributes are constant over geometries
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
#get the first day of each month for prediction
first_days <- ymd(paste0("2020-", sprintf("%02d", 1:12), "-01"))
first_day_doy <- yday(first_days)

#calculate Fourier terms
first_day_seasonality <- tibble(
  mean_doy = first_day_doy,
  sin_doy1 = sin(2 * pi * mean_doy / 365.25),
  cos_doy1 = cos(2 * pi * mean_doy / 365.25),
  sin_doy2 = sin(4 * pi * mean_doy / 365.25),
  cos_doy2 = cos(4 * pi * mean_doy / 365.25),
  sin_doy3 = sin(6 * pi * mean_doy / 365.25),
  cos_doy3 = cos(6 * pi * mean_doy / 365.25),
  date = first_days
) %>%
  mutate(month = lubridate::month(as.Date(mean_doy, origin = "2019-12-31"))) %>%
  left_join(monthly_means)
## Joining with `by = join_by(month)`
nd_m2 <- nd_m2 %>%
  crossing(first_day_seasonality)

#take posterior draws - note storing as an rvars object for efficiency
m2_draws <- add_epred_rvars(object = m2,
                            newdata = nd_m2,
                            ndraws = 200) #note can only manage this without crashing computer!

#now extract the summary mean and sd for each month-grid cell combination
m2_epred_array <- posterior::draws_of(m2_draws$.epred)

#compute posterior means per location
m2_draws$.epred_mean <- colMeans(m2_epred_array)

#compute posterior sd per location
m2_draws$.epred_sd <- apply(m2_epred_array, 2, sd)

m2_sum <- m2_draws %>% 
  select(x, y, date, .epred_mean, .epred_sd)


#plot predictions
m2_sum %>%  
  mutate(date = month(date, label = TRUE)) %>%
  ggplot() +
  geom_tile(aes(x = x, y=y, fill=.epred_mean)) +
  geom_sf(data = scale_72_clusters, colour="grey78", fill=NA) +
  #geom_sf(data = main_roads_cropped, colour="white") + #roads looking a bit messy when plotted - work on this.
  scale_fill_viridis_c() +
  facet_wrap(date~.) +
  labs(title = "Log(PM2.5) exposure",
       x="",
       y="",
       caption = "White boundaries are SCALE study clusters") +
  theme_ggdist() +
  theme(panel.background = element_rect(fill=NA, colour="grey78"),
        strip.background = element_rect(colour = "grey78"),
        axis.text.x = element_text(angle=45, hjust=1))

#plot sd
m2_sum %>%  
  mutate(date = month(date, label = TRUE)) %>%
  ggplot() +
  geom_tile(aes(x = x, y=y, fill=.epred_sd)) +
  geom_sf(data = scale_72_clusters, colour="grey78", fill=NA) +
  #geom_sf(data = main_roads_cropped, colour="white") +
  scale_fill_viridis_c(option = "F") +
  facet_wrap(date~.) +
  labs(title = "Log(PM2.5) exposure",
       x="",
       y="",
       caption = "White boundaries are SCALE study clusters") +
  theme_ggdist() +
  theme(panel.background = element_rect(fill=NA, colour="grey78"),
        strip.background = element_rect(colour = "grey78"),
        axis.text.x = element_text(angle=45, hjust=1))

Sample coordinates, and plot over time

#sample coordinate points
#will sort out the small number later
set.seed(123)
sampled_points_m2 <- aq_model_data %>%
  sample_n(50) %>%
  select(x, y, mean_temp_c, mean_current_humidity, pop_density_km2, building_coverage_pct, dist_to_road_m, grid_id, log_pm2_5)

#generate DOY 0–365 and calculate Fourier terms
doy_grid <- tibble(doy = seq(0,365, by=1)) %>%
  mutate(
    sin_doy1 = sin(2 * pi * doy / 365),
    cos_doy1 = cos(2 * pi * doy / 365),
    sin_doy2 = sin(4 * pi * doy / 365),
    cos_doy2 = cos(4 * pi * doy / 365),
    sin_doy3 = sin(6 * pi * doy / 365),
    cos_doy3 = cos(6 * pi * doy / 365)
  )

#expand grid across sampled locations
prediction_df_m2 <- sampled_points_m2 %>%
  crossing(doy_grid)

#add predictions
preds_m2 <- add_epred_draws(object = m2, newdata = prediction_df_m2, ndraws = 100)

#observed data for plotting
observed_data <- aq_model_data %>%
  select(mean_doy, log_pm2_5) %>%
  mutate(mean_doy = round(mean_doy))

#plot
preds_m2 %>%
  ungroup() %>%
  ggplot(aes(x=doy)) +
  stat_lineribbon(aes(y=.epred), .width=0.95) + 
  geom_jitter(data = observed_data, aes(x = mean_doy, y = log_pm2_5),
              color = "darkred", alpha = 0.6, width = 0.5, height = 0.0, size = 1.2) +
  scale_fill_brewer() +
  labs(title = "Model-estimated log(PM2.5) with empirical measurements",
       subtitle = "Model predictions with 95% CrI and observed data points",
       x = "Day of year",
       y = "log(PM2.5)",
       caption = "Modelled estimates restricted to within clusters") +
  theme_ggdist() +
  theme(panel.background = element_rect(colour = "grey78"),
        legend.position = "none")

Compare models, using LOO CV

library(loo)
## Warning: package 'loo' was built under R version 4.3.3
## This is loo version 2.8.0.9000
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
## 
##     loo
loo_m1 <- loo(m1)
loo_m2 <- loo(m2)

loo_compare(loo_m1, loo_m2)
##    elpd_diff se_diff
## m2    0.0       0.0 
## m1 -114.8      18.5

TODO